Buckler et al. 2009¶

This file aims to reproduce the findings of Buckler et al 2009, "The Genetic Architecture of Maize Flowering Time".

It used data from panzea

  • Phenotypic data panzea\phenotypes\Buckler_etal_2009_Science_flowering_time_data-090807\
  • Genotypic Data panzea\genotypes\GBS\v27\ZeaGBSv27_publicSamples_imputedV5_AGPv4-181023.vcf.gz
  • Genomic Data ...
In [ ]:
 
In [1]:
use_gpu_num = 1

import os
import pandas as pd
import numpy as np
import re

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn

# TODO fixme

device = "cuda" if torch.cuda.is_available() else "cpu"
if use_gpu_num in [0, 1]: 
    torch.cuda.set_device(use_gpu_num)
print(f"Using {device} device")

import tqdm

import plotly.graph_objects as go
import plotly.express as px

# [e for e in os.listdir() if re.match(".+\\.txt", e)]
/home/labmember/mambaforge/envs/pytorch_mamba/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
Using cuda device
In [2]:
nam_overview = pd.read_table('../ext_data/zma/panzea/phenotypes/Buckler_etal_2009_Science_flowering_time_data-090807/NAMSum0607FloweringTraitBLUPsAcross8Envs.txt')
nam_overview
Out[2]:
Geno_Code Entry_ID Group pop entry Days_To_Anthesis_BLUP_Sum0607 Days_To_Silk_BLUP_Sum0607 ASI_BLUP_Sum0607
0 Z001E0001 04P1367A51A Z001 1 1 75.5364 77.1298 1.4600
1 Z001E0002 04P1368A51A Z001 1 2 76.9075 77.7945 1.3928
2 Z001E0003 04P1368B51A Z001 1 3 75.2646 75.2555 0.8644
3 Z001E0004 04P1370B51A Z001 1 4 73.6933 75.7604 2.0012
4 Z001E0005 04P1371B51A Z001 1 5 79.2441 81.2611 1.8931
... ... ... ... ... ... ... ... ...
5458 Z027E0277 W64A NaN 27 277 71.9008 73.9811 2.6756
5459 Z027E0278 WD NaN 27 278 62.0212 60.5992 -0.5733
5460 Z027E0279 Wf9 NaN 27 279 71.9970 72.2319 0.8338
5461 Z027E0280 Yu796_NS NaN 27 280 74.5107 73.9727 0.2935
5462 Z027E0282 Mo17 NaN 27 282 72.7428 75.5080 3.0455

5463 rows × 8 columns

In [3]:
data = pd.read_table('../ext_data/zma/panzea/phenotypes/Buckler_etal_2009_Science_flowering_time_data-090807/markergenotypes062508.txt', skiprows=1
                    ).reset_index().rename(columns = {'index': 'Geno_Code'})
data
Out[3]:
Geno_Code days2anthesis days2silk asi pop i0 i1 i2 i3 i4 ... i1096 i1097 i1098 i1099 i1100 i1101 i1102 i1103 i1104 i1105
0 Z001E0001 75.5364 77.1298 1.4600 1 0.0 0.0 0.0 0.0 0.0 ... 2.0 2.0 2.0 2.0 2.0 1.0 0.0 0.0 0.0 0.0
1 Z001E0002 76.9075 77.7945 1.3928 1 2.0 2.0 2.0 2.0 2.0 ... 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
2 Z001E0003 75.2646 75.2555 0.8644 1 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 Z001E0004 73.6933 75.7604 2.0012 1 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 2.0 2.0 2.0 2.0
4 Z001E0005 79.2441 81.2611 1.8931 1 0.0 0.0 0.0 0.0 0.0 ... 2.0 2.0 2.0 2.0 2.0 1.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4694 Z026E0196 77.6523 80.2916 1.9698 26 0.0 0.0 0.0 0.0 0.0 ... 2.0 2.0 2.0 2.0 2.0 2.0 1.0 1.0 1.0 1.0
4695 Z026E0197 78.5015 82.2767 3.2979 26 2.0 2.0 2.0 2.0 2.0 ... 0.5 0.5 0.5 0.5 0.5 0.5 1.0 1.0 1.0 1.0
4696 Z026E0198 77.4219 79.7868 2.2208 26 1.0 1.0 1.0 1.0 2.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4697 Z026E0199 78.6712 82.8476 4.1247 26 2.0 2.0 0.0 0.0 0.0 ... 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0
4698 Z026E0200 77.4937 82.4678 4.2915 26 0.0 0.0 0.0 2.0 2.0 ... 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0 2.0

4699 rows × 1111 columns

In [4]:
px.scatter_matrix(data.loc[:, ['days2anthesis', 'days2silk', 'asi']])
/home/labmember/mambaforge/envs/pytorch_mamba/lib/python3.10/site-packages/plotly/express/_core.py:279: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  dims = [
In [ ]:
 
In [ ]:
 
In [5]:
d2a = np.array(data['days2anthesis'])
d2s = np.array(data['days2silk'])
asi = np.array(data['asi'])

xs = np.array(data.drop(columns = ['days2anthesis', 'days2silk', 'asi', 'pop', 'Geno_Code']))

n_obs = xs.shape[0]

np_seed = 9070707
rng = np.random.default_rng(np_seed)  # can be called without a seed

test_pr = 0.2

test_n = round(n_obs*test_pr)
idxs = np.linspace(0, n_obs-1, num = n_obs).astype(int)
rng.shuffle(idxs)

test_idxs = idxs[0:test_n]
train_idxs = idxs[test_n:-1]
In [6]:
# make up tensors
def calc_cs(x): return [np.mean(x, axis = 0), np.std(x, axis = 0)]
In [7]:
def apply_cs(xs, cs_dict_entry): return ((xs - cs_dict_entry[0]) / cs_dict_entry[0])
In [ ]:
 
In [8]:
scale_dict = {
    'd2a':calc_cs(d2a[train_idxs]),
    'd2s':calc_cs(d2s[train_idxs]),
    'asi':calc_cs(asi[train_idxs]),
    'xs' :calc_cs(xs[train_idxs])
}
In [9]:
y1 = apply_cs(d2a, scale_dict['d2a'])
y2 = apply_cs(d2s, scale_dict['d2s'])
y3 = apply_cs(asi, scale_dict['asi'])

# No need to cs xs -- 0-2 scale
# apply_cs(xs, scale_dict['xs'])

y1_train = torch.from_numpy(y1[train_idxs]).to(device).float()[:, None]
y2_train = torch.from_numpy(y2[train_idxs]).to(device).float()[:, None]
y3_train = torch.from_numpy(y3[train_idxs]).to(device).float()[:, None]
xs_train = torch.from_numpy(xs[train_idxs]).to(device).float()

y1_test = torch.from_numpy(y1[test_idxs]).to(device).float()[:, None]
y2_test = torch.from_numpy(y2[test_idxs]).to(device).float()[:, None]
y3_test = torch.from_numpy(y3[test_idxs]).to(device).float()[:, None]
xs_test = torch.from_numpy(xs[test_idxs]).to(device).float()
In [10]:
class CustomDataset(Dataset):
    def __init__(self, y1, y2, y3, xs, transform = None, target_transform = None):
        self.y1 = y1
        self.y2 = y2
        self.y3 = y3
        self.xs = xs
        self.transform = transform
        self.target_transform = target_transform    
    
    def __len__(self):
        return len(self.y1)
    
    def __getitem__(self, idx):
        y1_idx = self.y1[idx]
        y2_idx = self.y2[idx]
        y3_idx = self.y3[idx]
        xs_idx = self.xs[idx]
        
        if self.transform:
            xs_idx = self.transform(xs_idx)
            
        if self.target_transform:
            y1_idx = self.transform(y1_idx)
            y2_idx = self.transform(y2_idx)
            y3_idx = self.transform(y3_idx)
        return xs_idx, y1_idx, y2_idx, y3_idx
In [11]:
training_dataloader = DataLoader(
    CustomDataset(
        y1 = y1_train,
        y2 = y2_train,
        y3 = y3_train,
        xs = xs_train
    ), 
    batch_size = 64, 
    shuffle = True)

testing_dataloader = DataLoader(
    CustomDataset(
        y1 = y1_test,
        y2 = y2_test,
        y3 = y3_test,
        xs = xs_test
    ), 
    batch_size = 64, 
    shuffle = True)

xs.shape
Out[11]:
(4699, 1106)

Version 1, Predict y1 (Anthesis)¶

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()    
        self.x_network = nn.Sequential(
            nn.Linear(1106, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 1))
        
    def forward(self, x):
        x_out = self.x_network(x)
        return x_out

model = NeuralNetwork().to(device)
# print(model)
In [13]:
xs_i, y1_i, y2_i, y3_i = next(iter(training_dataloader))
model(xs_i).shape # try prediction on one batch
Out[13]:
torch.Size([64, 1])
In [14]:
def train_loop(dataloader, model, loss_fn, optimizer, silent = False):
    size = len(dataloader.dataset)
    for batch, (xs_i, y1_i, y2_i, y3_i) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(xs_i)
        loss = loss_fn(pred, y1_i) # <----------------------------------------

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(y1_i) # <----------------
            if not silent:
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

                
def train_error(dataloader, model, loss_fn, silent = False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0

    with torch.no_grad():
        for xs_i, y1_i, y2_i, y3_i in dataloader:
            pred = model(xs_i)
            train_loss += loss_fn(pred, y1_i).item() # <----------------------
            
    train_loss /= num_batches
    return(train_loss) 

            
def test_loop(dataloader, model, loss_fn, silent = False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():
        for xs_i, y1_i, y2_i, y3_i in dataloader:
            pred = model(xs_i)
            test_loss += loss_fn(pred, y1_i).item() # <-----------------------

    test_loss /= num_batches
    if not silent:
        print(f"Test Error: Avg loss: {test_loss:>8f}")
    return(test_loss) 


def train_nn(
    training_dataloader,
    testing_dataloader,
    model,
    learning_rate = 1e-3,
    batch_size = 64,
    epochs = 500
):
    # Initialize the loss function
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    loss_df = pd.DataFrame([i for i in range(epochs)], columns = ['Epoch'])
    loss_df['TrainMSE'] = np.nan
    loss_df['TestMSE']  = np.nan

    for t in tqdm.tqdm(range(epochs)):
        # print(f"Epoch {t+1}\n-------------------------------")
        train_loop(training_dataloader, model, loss_fn, optimizer, silent = True)

        loss_df.loc[loss_df.index == t, 'TrainMSE'
                   ] = train_error(training_dataloader, model, loss_fn, silent = True)
        
        loss_df.loc[loss_df.index == t, 'TestMSE'
                   ] = test_loop(testing_dataloader, model, loss_fn, silent = True)
        
    return([model, loss_df])
In [15]:
model, loss_df = train_nn(
    training_dataloader,
    testing_dataloader,
    model,
    learning_rate = 1e-3,
    batch_size = 64,
    epochs = 500
)
100%|████████████████████████████████████████████| 500/500 [00:50<00:00,  9.96it/s]
In [16]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=loss_df.Epoch, y=loss_df.TrainMSE,
                    mode='lines', name='Train'))
fig.add_trace(go.Scatter(x=loss_df.Epoch, y=loss_df.TestMSE,
                    mode='lines', name='Test'))
fig.show()
In [34]:
# ! conda install captum -c pytorch -y
In [19]:
# imports from captum library
from captum.attr import LayerConductance, LayerActivation, LayerIntegratedGradients
from captum.attr import IntegratedGradients, DeepLift, GradientShap, NoiseTunnel, FeatureAblation
In [ ]:
 
In [20]:
ig = IntegratedGradients(model)
ig_nt = NoiseTunnel(ig)
dl = DeepLift(model)
gs = GradientShap(model)
fa = FeatureAblation(model)

ig_attr_test = ig.attribute(xs_test, n_steps=50)
ig_nt_attr_test = ig_nt.attribute(xs_test)
dl_attr_test = dl.attribute(xs_test)
gs_attr_test = gs.attribute(xs_test, xs_train)
fa_attr_test = fa.attribute(xs_test)
/home/labmember/mambaforge/envs/pytorch_mamba/lib/python3.10/site-packages/captum/_utils/gradient.py:57: UserWarning:

Input Tensor 0 did not already require gradients, required_grads has been set automatically.

/home/labmember/mambaforge/envs/pytorch_mamba/lib/python3.10/site-packages/captum/attr/_core/deep_lift.py:304: UserWarning:

Setting forward, backward hooks and attributes on non-linear
               activations. The hooks and attributes will be removed
            after the attribution is finished

In [21]:
[e.shape for e in [ig_attr_test,
ig_nt_attr_test,
dl_attr_test,
gs_attr_test,
fa_attr_test]]
Out[21]:
[torch.Size([940, 1106]),
 torch.Size([940, 1106]),
 torch.Size([940, 1106]),
 torch.Size([940, 1106]),
 torch.Size([940, 1106])]
In [22]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = np.linspace(0, 1106-1, 1106),
                         y = ig_nt_attr_test.cpu().detach().numpy().mean(axis=0),
                         mode='lines', name='Test'))
fig.add_trace(go.Scatter(x = np.linspace(0, 1106-1, 1106),
                         y = dl_attr_test.cpu().detach().numpy().mean(axis=0),
                         mode='lines', name='Test'))
fig.add_trace(go.Scatter(x = np.linspace(0, 1106-1, 1106),
                         y = gs_attr_test.cpu().detach().numpy().mean(axis=0),
                         mode='lines', name='Test'))
fig.add_trace(go.Scatter(x = np.linspace(0, 1106-1, 1106),
                         y = fa_attr_test.cpu().detach().numpy().mean(axis=0),
                         mode='lines', name='Test'))
fig.show()
In [23]:
len(dl_attr_test.cpu().detach().numpy().mean(axis = 0))
Out[23]:
1106
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

Version 2, Predict y1 (Anthesis), y2 (Silking), and y3 (ASI)¶

Here each model will predict 3 values. The loss function is still mse, but the y tensors are concatenated

In [37]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()    
        self.x_network = nn.Sequential(
            nn.Linear(1106, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 3))
        
    def forward(self, x):
        x_out = self.x_network(x)
        return x_out

model = NeuralNetwork().to(device)
# print(model)
In [38]:
xs_i, y1_i, y2_i, y3_i = next(iter(training_dataloader))
model(xs_i).shape # try prediction on one batch
Out[38]:
torch.Size([64, 3])
In [ ]:
 
In [ ]:
 
In [39]:
def train_loop(dataloader, model, loss_fn, optimizer, silent = False):
    size = len(dataloader.dataset)
    for batch, (xs_i, y1_i, y2_i, y3_i) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(xs_i)
        loss = loss_fn(pred, torch.concat([y1_i, y2_i, y3_i], axis = 1)) # <----------------------------------------

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(y1_i) # <----------------
            if not silent:
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

                
def train_error(dataloader, model, loss_fn, silent = False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss = 0

    with torch.no_grad():
        for xs_i, y1_i, y2_i, y3_i in dataloader:
            pred = model(xs_i)
            train_loss += loss_fn(pred, torch.concat([y1_i, y2_i, y3_i], axis = 1)).item() # <----------------------
            
    train_loss /= num_batches
    return(train_loss) 

            
def test_loop(dataloader, model, loss_fn, silent = False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss = 0

    with torch.no_grad():
        for xs_i, y1_i, y2_i, y3_i in dataloader:
            pred = model(xs_i)
            test_loss += loss_fn(pred, torch.concat([y1_i, y2_i, y3_i], axis = 1)).item() # <-----------------------

    test_loss /= num_batches
    if not silent:
        print(f"Test Error: Avg loss: {test_loss:>8f}")
    return(test_loss) 


def train_nn(
    training_dataloader,
    testing_dataloader,
    model,
    learning_rate = 1e-3,
    batch_size = 64,
    epochs = 500
):
    # Initialize the loss function
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    loss_df = pd.DataFrame([i for i in range(epochs)], columns = ['Epoch'])
    loss_df['TrainMSE'] = np.nan
    loss_df['TestMSE']  = np.nan

    for t in tqdm.tqdm(range(epochs)):
        # print(f"Epoch {t+1}\n-------------------------------")
        train_loop(training_dataloader, model, loss_fn, optimizer, silent = True)

        loss_df.loc[loss_df.index == t, 'TrainMSE'
                   ] = train_error(training_dataloader, model, loss_fn, silent = True)
        
        loss_df.loc[loss_df.index == t, 'TestMSE'
                   ] = test_loop(testing_dataloader, model, loss_fn, silent = True)
        
    return([model, loss_df])
In [40]:
model, loss_df = train_nn(
    training_dataloader,
    testing_dataloader,
    model,
    learning_rate = 1e-3,
    batch_size = 64,
    epochs = 500
)
 68%|█████████████████████████████▊              | 339/500 [00:36<00:17,  9.16it/s]
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[40], line 1
----> 1 model, loss_df = train_nn(
      2     training_dataloader,
      3     testing_dataloader,
      4     model,
      5     learning_rate = 1e-3,
      6     batch_size = 64,
      7     epochs = 500
      8 )

Cell In[39], line 67, in train_nn(training_dataloader, testing_dataloader, model, learning_rate, batch_size, epochs)
     63 loss_df['TestMSE']  = np.nan
     65 for t in tqdm.tqdm(range(epochs)):
     66     # print(f"Epoch {t+1}\n-------------------------------")
---> 67     train_loop(training_dataloader, model, loss_fn, optimizer, silent = True)
     69     loss_df.loc[loss_df.index == t, 'TrainMSE'
     70                ] = train_error(training_dataloader, model, loss_fn, silent = True)
     72     loss_df.loc[loss_df.index == t, 'TestMSE'
     73                ] = test_loop(testing_dataloader, model, loss_fn, silent = True)

Cell In[39], line 10, in train_loop(dataloader, model, loss_fn, optimizer, silent)
      8 # Backpropagation
      9 optimizer.zero_grad()
---> 10 loss.backward()
     11 optimizer.step()
     13 if batch % 100 == 0:

File ~/mambaforge/envs/pytorch_mamba/lib/python3.10/site-packages/torch/_tensor.py:487, in Tensor.backward(self, gradient, retain_graph, create_graph, inputs)
    477 if has_torch_function_unary(self):
    478     return handle_torch_function(
    479         Tensor.backward,
    480         (self,),
   (...)
    485         inputs=inputs,
    486     )
--> 487 torch.autograd.backward(
    488     self, gradient, retain_graph, create_graph, inputs=inputs
    489 )

File ~/mambaforge/envs/pytorch_mamba/lib/python3.10/site-packages/torch/autograd/__init__.py:197, in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    192     retain_graph = create_graph
    194 # The reason we repeat same the comment below is that
    195 # some Python versions print out the first line of a multi-line function
    196 # calls in the traceback and some print out the last line
--> 197 Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    198     tensors, grad_tensors_, retain_graph, create_graph, inputs,
    199     allow_unreachable=True, accumulate_grad=True)

KeyboardInterrupt: 
In [54]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=loss_df.Epoch, y=loss_df.TrainMSE,
                    mode='lines', name='Train'))
fig.add_trace(go.Scatter(x=loss_df.Epoch, y=loss_df.TestMSE,
                    mode='lines', name='Test'))
fig.show()
In [55]:
model, loss_df = train_nn(
    training_dataloader,
    testing_dataloader,
    model,
    learning_rate = 1e-3,
    batch_size = 64,
    epochs = 5000
)
100%|██████████████████████████████████████████| 5000/5000 [07:11<00:00, 11.58it/s]
In [56]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=loss_df.Epoch, y=loss_df.TrainMSE,
                    mode='lines', name='Train'))
fig.add_trace(go.Scatter(x=loss_df.Epoch, y=loss_df.TestMSE,
                    mode='lines', name='Test'))
fig.show()
In [ ]:
 
In [ ]:
 
In [25]:
'../ext_data/zma/panzea/phenotypes/'
Out[25]:
'../ext_data/zma/panzea/phenotypes/traitMatrix_maize282NAM_v15-130212.txt'
In [27]:
# pd.read_table('../ext_data/zma/panzea/phenotypes/traitMatrix_maize282NAM_v15-130212.txt', low_memory = False)
Out[27]:
<Trait> GerminationCount GerminationCount.1 GerminationCount.2 GerminationCount.3 GerminationCount.4 GerminationCount.5 GerminationCount.6 StandCount StandCount.1 ... 20KernelWeight 20KernelWeight.1 20KernelWeight.2 20KernelWeight.3 20KernelWeight.4 20KernelWeight.5 20KernelWeight.6 RowQuality RowQuality.1 HerbicideSensitivity
0 <Header name=env> 65.0 06A 06CL1 06FL1 07K 08A 26M3 06CL1 06PR ... 06A 06CL1 06FL1 06PR 07A 07CL1 08A 06A 07FL1 08A
1 33-16 13.0 3 10.5 6 NaN 1 2 7 8 ... NaN 5.0325 NaN 5.1 NaN 3.95 NaN 4 3 NaN
2 38-11 12.0 6 12 6 NaN 10 3 8 8 ... NaN NaN NaN 3.4 4.3 4.905 4.6 3 3 NaN
3 4226 10.0 1 11 5 NaN 8 3 3.5 8 ... NaN 4.385 NaN NaN 3.3 5.1 4 3 3 NaN
4 4722 13.0 6 10.5 5 NaN 2 7 7.5 8 ... 3.5 NaN NaN 3 3 2.5 2.8 1 3 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5702 Z026E0222 NaN NaN NaN NaN NaN 15 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 3.85 NaN NaN NaN
5703 Z026E0225 NaN NaN NaN NaN NaN 11 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 4.5 NaN NaN NaN
5704 Z026E0230 NaN NaN NaN NaN NaN 8 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 4.1 NaN NaN NaN
5705 Z026E0231 NaN NaN NaN NaN NaN 10 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 3.95 NaN NaN NaN
5706 Z026E0232 NaN NaN NaN NaN NaN 12 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN 4.6 NaN NaN NaN

5707 rows × 286 columns

In [33]:
# pd.read_excel('../ext_data/zma/panzea/phenotypes/traitMatrix_maize282NAM_v15-130212_TraitDescritptions.xlsx')
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: